import requests
import re
# 爬虫打开的浏览器的网页
url = 'http://weather.com.cn/weather1d/101010100.shtml'
resp = requests.get(url)  # 对象
# 设置编码格式
resp.encoding = 'utf-8'
print(resp.text)  # 对象名.属性名

# 从html中提取数据
'''
<span class="name">三亚</span>
<span class="weather">多云</span>
<span class="wd">32/27℃</span>
<span class="zs">适宜</span>
'''
city = re.findall('<span class="name">([\u4e00-\u9fff]*)</span>',resp.text)
print(city)

weather = re.findall('<span class="weather">([\u4e00-\u9fff]*)</span>',resp.text)
print(weather)

wd = re.findall('<span class="wd">(.*)</span>',resp.text)
print(wd)

zs = re.findall('<span class="zs">([\u4e00-\u9fff]*)</span>',resp.text)
print(zs)

# 将提取出来的数据进行打包
lst = []
for a,b,c,d in zip(city,weather,wd,zs):
    lst.append([a,b,c,d])
print(lst)

for item in lst:
    print(item)

